####################################################################################################################
####################################################################################################################
####Non-state Atrocities in Capital Cities - CEM Matching Analysis: Nighttime Ligth Cells in Afflicted Countries####
####################################################################################################################
####################################################################################################################
library(plyr)
library(cem)
library(foreign)
library(doBy)
library(plyr)
library(mvtnorm)

##############################
###Create Data for Matching###
##############################
# Set working library
setwd("~/Data/Global Analysis/")
# Read in main data
main.data <- read.dta("full.grid.upd.dta")
###Subset data for only countries that experienced at least one insurgent atrocity
#Summarize insurgent atrocities by country
at.count <- summaryBy(incidentnonstatefull ~ ccode, FUN=c("sum"), data=main.data, na.rm=TRUE)
names(at.count) <- c("ccode", "incidentnonstatefull.sum.count")
#Join with main dataset
at.count.join <- join(main.data, at.count)
#Subset only grid-cell years in countries that experienced at least one insurgent atrocity within the period of analysis
main.data.at <- subset(at.count.join,  at.count.join$incidentnonstatefull.sum.count>0)
##Finally, subset only areas with NTL within this sample
main.data.at.nl <- subset(main.data.at, laglnNL_sum>0)
#Next, keep the variables used for matching and remove NAs
prop.at.nl<-na.omit(main.data.at.nl[,c("NSAtBin", "lag_Capital", "loglagttime", "loglagppp", "loglagbdist1", "loglagpop")])
#Create a binary dependent variable denoting whether a given grid cell experienced at least on insurgent atrocity or not in a given year
prop.at.nl$NSAtBin <- ifelse(prop.at.nl$NSAtBin>0,1,0)
#Make sure both the treatment and DV are integers
prop.at.nl$NSAtBin <- as.integer(prop.at.nl$NSAtBin)
prop.at.nl$lag_Capital <- as.integer(prop.at.nl$lag_Capital)

######################
###Cell Year Sample###
######################
###Standard CEM (without extrapolation)
#Create a matrix
mat.at.nl <- cem(treatment="lag_Capital", data=prop.at.nl, keep.all = TRUE, drop="NSAtBin")
mat.at.nl
#Regress on treatment
log.t.at.nl <-  att(mat.at.nl, NSAtBin~lag_Capital, data=prop.at.nl, model='logit', extrapolate = FALSE)
summary(log.t.at.nl)
#Plot the results
pdf("cematnl.pdf")
plot(log.t.at.nl, mat.at.nl, prop.at.nl, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()

###CEM with extrapolation
#Regress on treatment
log.t.e.at.nl <-  att(mat.at.nl, NSAtBin~lag_Capital, data=prop.at.nl, model='logit', extrapolate = TRUE)
summary(log.t.e.at.nl)
#Plot the results
pdf("cemextatnl.pdf")
plot(log.t.e.at.nl, mat.at.nl, prop.at.nl, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()

#####################################################
###Values Averged by Cell for the 1996-2009 Period###
#####################################################
#collapse
prop.i <- summaryBy(NSAtBin+lag_Capital+loglagppp+loglagttime+loglagbdist1+loglagpop ~ gid, FUN=c("mean"), data=main.data.at.nl, na.rm=TRUE)
names(prop.i) <- c("gid", "NSAtBin", "lag_Capital", "loglagttime","lagppp", "lagbdist1", "loglagpop")
#Remove NAs
prop.i <-na.omit(prop.i)
prop.i$lag_Capital <- as.numeric(ifelse(prop.i$lag_Capital>0,1,0))
prop.i$NSAtBin <- as.numeric(ifelse(prop.i$NSAtBin>0,1,0))
#Remove the GID indicator from dataset
prop.i$gid <- NULL

###Standard CEM (without extrapolation)
#Create a matrix
matg.nl <- cem(treatment="lag_Capital", data=prop.i, keep.all = TRUE, drop="NSAtBin")
matg.nl
#Regress on treatment
log.t.g.nl <-  att(matg.nl, NSAtBin~lag_Capital, data=prop.i, model='logit', extrapolate = FALSE)
summary(log.t.g.nl)
#Plot the results
pdf("cemgidatnl.pdf")
plot(log.t.g.nl, matg.nl, prop.i, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()

#CEM with extraploation
#Regress on treatment
log.t.g.e.nl <-  att(matg.nl, NSAtBin~lag_Capital, data=prop.i, model='logit', extrapolate = TRUE)
summary(log.t.g.e.nl)
#Plot the results
pdf("cemgidextatnl.pdf")
plot(log.t.g.e.nl, matg.nl, prop.i, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()

